********************************************************************************
* DATA CONSTRUCTION CODE FOR CAVAILLE, CHEN AND VAN DER STRAETEN (2023)        *
*                                                                              *
* REFERNCE  "Who Cares? Measuring Differences in Preference Intensity"         *
* Political Science Research and Method                                        *
* AUTHOR	Charlotte Cavaille                                                 *
* CONTACT	cavaille@umich.edu                                                 *
* VERSION	1.0 (Jan 2024)                                                     *
* SOFTWARE	STATA SE VERSION 18                                                *
********************************************************************************



*****************
* PROGRAM SETUP *
*****************

version 18
clear all
set more off
set scheme plotplainblind, permanently

***************
* DIRECTORIES *
***************

global pathown = "/Users/cavaille/Dropbox/CO.who_cares/2.Data and Analyses"

global pathdata = "$pathown/7.Replication file PSRM/data"
global pathtemp = "$pathown/7.Replication file PSRM/temp"
global pathout = "$pathown/7.Replication file PSRM/out"
global pathcode = "$pathown/7.Replication file PSRM"



********************************************************************************
********************************************************************************
**** BASE DATASET
** dataset_final.dta

********************************************************************************
********************************************************************************
**** DESCRIPTION OF RAW DATA AND CONSTRUCTION STEPS
** Survey data was collected over two waves. Each wave includes 2 parts. For each
** wave, the two parts have to be merged. I did the merge based on ID and treatment 
**  This requires I first deal with doublons with same ID and treatment, 
** For this, I take the observation that was time-wise
** recorded first and drop others. Coding for the variables are wave specific 
** because of differences in variable names. Once Wave 1 and 2 have been 
** constructed and variables coded, I merge the waves using a respondent ID 
** crossover table provided by IPSOS.



****
**** step 1: Construct wave 1 data 
****

** Load data 
use "$pathdata/part1_wave1_qualtrics.dta", replace

** destring
destring block, replace
destring treatment, replace
destring id, replace

**- A small subset of respondents received a different version of QVSR, 
** this was a pilot used for a separate study, I drop these observations.
drop if treatment == 7 

**- generate treatment variable 
** [within each treatment branch, we further randomised respondents into 
** receiving versus not receiving a partisan prime. Our treatment failed to 
** manipulate partisan identity and had no effect on policy preferences. 
** We consequently pool primed and non-primed observations.]
gen method = treatment
recode method (1 4 = 1) ( 2 5 = 2) ( 3 6 = 3) 

label variable method	"Survey method treatment"
label define method_lbl 1   "Likert", add
label define method_lbl 2   "Likert+", add
label define method_lbl 3   "QVSR", add
label define method_lbl 999   "Dropped out before being assigned to treatment", add
label values method method_lbl

**- drop if dropped out before being assigned to treatment 
drop if method == 999


**- drop observations (14 in total) generated by authors during last minute sanity check 
**- test, no id
drop if id == .
**- CC's birthdate
drop if id == 13041984
**- if over a billion, was entered by CC
drop if id > 1000000000

*** informed consent and sound check: minimal conditions for being included in the sample 
gen ICN = .
replace ICN = 1 if ICN_Q2 == 1
replace ICN = 0 if ICN_Q2 == 2

label variable ICN	"Informed consent page"
label define ICN_lbl 1   "Consented", add
label define ICN_lbl 0   "Did not consent", add
label values ICN ICN_lbl

**- keep if consented 
keep if ICN == 1 

gen sound = round(soundC)
recode sound (1 2 3 5 = 0) (4 = 1)
replace sound = 0 if sound == .

label variable sound	"Sound check"
label define sound_lbl 1   "Passed", add
label define sound_lbl 0   "Did not pass", add
label values sound sound_lbl

**- keep if passed sound check
keep if sound == 1

**-- match to part 2 using id and method [cannot have any doublons there or won't be able to match to part 2 ]
quietly bys id method :  gen dup_p1 = cond(_N==1,0,_n)
tab dup_p1
**--- 47 doublons [several people do the same treatment several times]

**- order in which showed up in survey / treatment branch 
sort id method StartDate 
** generate the order variable
quietly by id method   : gen n_p1 = _n

** keep the first one in order of appearance 
keep if dup_p1 == 0 | dup_p1 > 0 & n_p1 == 1 

drop dup_p1 n_p1 


save "$pathtemp/part1w1_temp.dta", replace 

****
**** now turn to part 2 wave 1
****


clear all
use "$pathdata/part2_wave1_qualtrics.dta", replace

**-- drop author last. min tests
drop if id ==""
drop if id =="undefined"
drop if id =="testest"
drop if id == "13041984"
destring id, replace
drop if id > 1000000000


**-- treatment variable (called treat in part 2, not treatment as in part 1)
destring treat, replace

**-- drop QVSRN
drop if treat == 7 

gen method = treat
recode method (1 4 = 1) ( 2 5 = 2) ( 3 6 = 3) 

label variable method	"Survey method treatment"
label define method_lbl 1   "Likert", add
label define method_lbl 2   "Likert+", add
label define method_lbl 3   "QVSR", add
label define method_lbl 999   "Dropped out before being assigned to treatment", add
label values method method_lbl

**- by definition already assigned to treatment by the time get to part 2, no drop

**-- match to part 1 using id and method [cannot have any doublons there]
quietly bys id method :  gen dup_p2 = cond(_N==1,0,_n)
tab dup_p2
**--- 102 doublons [several people do the same treatment several times]

**- order in which showed up in survey / treatment branch 
sort id method StartDate 
** generate the order variable
quietly by id method   : gen n_p2 = _n

** keep the first one 
keep if dup_p2 == 0 | dup_p2 > 0 & n_p2 == 1 

drop dup_p2 n_p2 

save "$pathtemp/part2w1_temp.dta", replace 



****
**** merge part 1 and 2 , wave 1 
****

use "$pathtemp/part1w1_temp.dta", replace 
merge 1:1 id method using "$pathtemp/part2w1_temp.dta"

*** more than half of the 161 that could not be merged are QVSR
*** some people dropped out. Others restarted but infortunately, there is no way to match these observations 
*** without strong assumptions about the fact that order in part 1 matches order in part 2
*** As a result, more obs are lost in QVSR. The result is that when running that analysis on unique respondents, 
*** less observations in QVSR. This is partly an artifact of how Ipsos counted completes. We did not correct for doublons when counting completed, 
*** the same individuals who were retaking the survey were counted as a unique complete (respondent) when in fact were a unique observation. 
tab _merge method, miss
keep if _merge == 3 


*** how many people took the survey multiple times in the final wave 1 dataset ? 
*** 27 took it at least twice,
***  No evidence that more so for one treatment condition than another
quietly bys id:  gen dup = cond(_N==1,0,_n)
tab dup method, col
**[keep to see which one merges with wave 2, if any ]

*** Bottom line: because of glitches in QVSR, we loose more obs there (doublons we cannot match)
*** Once this is addressed, no differences across treatments in doublons 

drop dup _merge 

****
**** add the SES data from Ipsos
****

destring xideo xpppa1690, replace

merge m:1 id using "$pathdata/ses_ipsos.dta"
drop if _merge == 2

rename _merge missSES
recode missSES (3 = 0 ) 

label variable missSES	"flag who was not given SES data by IPSOS"
label define missSES_lbl 0   "in the SES file", add
label define missSES_lbl 1   "not in SES file", add
label values missSES missSES_lbl


****
**** Code wave 1 variables
****

do "$pathcode/0.code_var_w1_F.do"


***-- save 
rename id id_w1
save "$pathtemp/wave1_final.dta", replace 

erase "$pathtemp/part1w1_temp.dta"
erase "$pathtemp/part2w1_temp.dta"



****
**** step 2: Construct wave 2 data 
****


use "$pathdata/part1_wave2_qualtrics.dta", replace 

rename mno id
rename state2 session

rename xtreat method
drop if method == 7

drop if id == .
drop if ICN_Q2 == "No"
**-- no sound check here 
**-- no assignment to treatment also, they were simply recontacted and given same treatment as in wave 1 

sort id session StartDate
quietly by id session : gen n = _n

**-- will be matched to part 2 using id , so need to address doublons
quietly bys id  :  gen dup_p1 = cond(_N==1,0,_n)
tab dup_p1
**--- 74 doublons

**- order in which showed up in survey wave 2
sort id StartDate 
** generate the order variable
quietly by id   : gen n_p1 = _n

** keep the first one 
keep if dup_p1 == 0 | dup_p1 > 0 & n_p1 == 1 

drop dup_p1 n_p1 

save "$pathtemp/part1w2_temp.dta", replace 

**-- part 2
use "$pathdata/part2_wave2_qualtrics.dta", replace 

drop if id == "undefined"
destring id, replace 

**-- match to part 1 using id 
quietly bys id  :  gen dup_p2 = cond(_N==1,0,_n)
tab dup_p2
**--- 92 doublons

**- order in which showed up in survey wave 2, part 2
sort id StartDate 
** generate the order variable
quietly by id   : gen n_p2 = _n

** keep the first one 
keep if dup_p2 == 0 | dup_p2 > 0 & n_p2 == 1 

drop dup_p2 n_p2 

save "$pathtemp/part2w2_temp.dta", replace 

**-- merge the 2 
use "$pathtemp/part1w2_temp.dta", replace 
merge 1:1 id using "$pathtemp/part2w2_temp.dta"


*** Again more than half of the 161 that could not be merged are QVSR
*** 
tab _merge method, miss


keep if _merge == 3
drop _merge 



*** how many people took the survey multiple times in the final wave 2 dataset ? 
*** none
quietly bys id:  gen dup = cond(_N==1,0,_n)
tab dup method, col
drop dup 



****
**** Step 5: code wave 2 variables
****

rename id id_w2

do "$pathcode/0.code_var_w2_F.do"


save "$pathtemp/wave2_final.dta", replace 

erase "$pathtemp/part1w2_temp.dta"
erase "$pathtemp/part2w2_temp.dta"





****
**** Step 3: to merge wave1 and wave 2, need to use the id cross over table 
****



*** merge wave 2 with cross_over table [one observation is not in crossover]

use "$pathtemp/wave2_final.dta", replace 
merge 1:1 id_w2 using "$pathdata/mno_id_W1W2.dta"
keep if _merge == 3 
drop _merge 

save "$pathtemp/wave2_final.dta", replace 


****
**** Step 4: merge both waves 
****

use "$pathtemp/wave1_final.dta", replace 
merge 1:1 id_w1 method using "$pathtemp/wave2_final.dta", force

rename _merge dataT


label variable dataT	"flag who is in wave 1 only"
label define dataT_lbl 1   "In wave 1, not wave 2", add
label define dataT_lbl 2   "In wave 2 only", add
label define dataT_lbl 3   "In both", add
label values dataT dataT_lbl

tab dataT method, col row miss

rename id_w1 id


erase "$pathtemp/wave1_final.dta"
erase "$pathtemp/wave2_final.dta"


****
**** Code missing variables (need both waves together to do this)
****


****** Wave 1 predictor for wave 2 outcome punish 
*** difference between gun and immigration preferences 



gen diff = (votes_gunw1 - votes_wall_inw1)


sum diff if method == 1 
gen diffN1 = (diff - r(min))/(r(max)-r(min)) if method == 1 
sum diff if method == 2 & punished < .
gen diffN2 = (diff - r(min))/(r(max)-r(min)) if method == 2 
sum diff if method == 3 & punished < .
gen diffN3 = (diff - r(min))/(r(max)-r(min)) if method == 3 

gen diffN = diffN1 if method == 1 
replace diffN = diffN2 if method == 2 
replace diffN = diffN3 if method == 3 


local issues LPP2 LPP3
foreach j in `issues' {
gen diff_`j' = (votes_gunw1`j' - votes_wall_inw1`j')
}

local issues LPP2 LPP3
foreach j in `issues' {
sum diff_`j' if method == 2 & punished < .
gen diff`j'N2 = (diff_`j' - r(min))/(r(max)-r(min)) if method == 2 
}

local issues LPP2 LPP3
foreach j in `issues' {
gen diff`j'N = diffN 
replace diff`j'N = diff`j'N2 if method == 2 	
}





** sanity heck that nobody is one observation two treatments 


local issues AA gun wall paidL gender gay minW abortion deficit enviro
foreach var in `issues' {
	tab `var'L QV`var'
}


local issues AA gun wall paidL gender gay minW abortion deficit enviro
foreach var in `issues' {
	tab `var'L `var'Lp
}

local issues AA gun wall paidL gender gay minW abortion deficit enviro
foreach var in `issues' {
	tab `var'Lp QV`var'
}


keep id session xideo xpppa1690 party7 block vote3 ideo3 party7a method ICN QVgay-QVenviro xZIP-don_C_wall_inST id_w2 QVgayw2-QVenvirow2 AALw2-enviroLpw2 AALpp1w2-proportion wrote-diffLPP3N

save "$pathout/dataset_final.dta", replace 




